## This code replicates the analysis in section "Party Organization and Electoral Performance"

## LOGIC OF THE CODE ##
## First produced the diagram (FIGURE 2), with the structure of the analysis
## Loads the dataset of municipal level indicators
## (See authors for details as to how the dataset was assembled)
## For each party/period, it first performs the matching procedure described in the paper
## This produces a total of 9 matched datasets (3 periods X 3 parties)
## Each dataset is then converted into long format for DiD analysis
## Each dataset is then analyzed
## Results are collected into a table (TABLE 2)

rm(list=ls(all=TRUE))
library(reshape)
library(Zelig)
library(foreign)
library(MatchIt)
library(car)
library(xtable)
library(RItools)
setwd("~/Dropbox/Data/Paper-PT/")
savedir <- getwd()  


####################################################
### Create timeline diagram, FIGURE 1, in paper ####
pdf(file=paste(savedir,"/fig_timeline.pdf",sep=""),
	width=8,height=2)
par(mar=c(1,3,1,1))
plot(c(-2.5,8.5),c(-1,1),type="n",ylab="",xlab="",yaxt="n",xaxt="n",bty="n")
abline(h=0)
segments(x0=c(-2,0,2,4,6,8),
		x1=c(-2,0,2,4,6,8),
		y0=rep(-.15,6),y1=rep(.15,6))
text(0,0.4,"Nat'l Elec\n(Year 0)")
text(4,0.4,"Nat'l Elec\n(Year 4)")
text(8,0.4,"Nat'l Elec\n(Year 8)")
text(2,0.4,"Local Elec\n(Year 2)")
text(6,0.4,"Local Elec\n(Year 6)")
axis(side=2,at=0,label="time",las=2)
text(0,-.3,expression(t[pre]))
text(8,-.3,expression(t[post]))
text(2,-.3,expression(t[0]))
text(6,-.3,expression(t[1]))
lines(c(1,5),c(0,0),lwd=5)
axis(side=1,at=c(1,5),labels=NA,line=-2)
text(3,-.8,"Effective 'Treatment' Period")
dev.off()


##### Declare functions that are used to reshape the data and tables #####
did.melt <- function(x,weights=F,pty="all"){ 
	cat("Reshaping the data for DiD analysis...\n");flush.console()
    require(reshape)
    if(weights==T){more.vars<-"weights";
    			   cat(paste("Returning DiD set for party",
    			   pty,"including weights from matching"))
    			   }else{more.vars<-NULL}
    if(pty=="all"|pty==13){
    tmp13 <- melt(x, 
    		id.vars=c("mun","state","pt2000","pt2004","pt2008","pt1996",more.vars), 
    		measure.vars=c("dilma.vs.2010","lula.vs.2006","lula.vs.2002",
    						"lula.vs.1998","lula.vs.1994","pt.vs.1994",
    						"pt.vs.1998","pt.vs.2010","pt.vs.2006","pt.vs.2002"))
    tmp13$TT <- as.factor(as.numeric(gsub("\\D","",tmp13$variable)))
    tmp13$variable <- gsub("(\\w)\\.vs.*$","\\1",tmp13$variable,perl=T)
    tmp13$variable <- gsub("(lula)|(dilma)","ptpres",tmp13$variable,perl=T)
    tmp13$variable <- as.factor(gsub("(\\w)\\..*$","\\1",tmp13$variable,perl=T)) 
    cast.formula <- formula(paste(
    					    paste("mun","state","pt2000","pt2004","pt2008","pt1996","TT",sep="+"),
    					    if(weights==T){"+weights~variable"}else{"~variable"},sep=""))
    did13 <- cast(tmp13,cast.formula)
    did13[,c("ptpres","pt")]<-did13[,c("ptpres","pt")]*100
    did <- did13
    }
   if(pty=="all"|pty==15){
    tmp15 <- melt(x, 
    		id.vars=c("mun","state","pmdb2000","pmdb2004","pmdb2008","pmdb1996",more.vars), 
    		measure.vars=c("pmdb.vs.1994",
    						"pmdb.vs.1998","pmdb.vs.2010","pmdb.vs.2006","pmdb.vs.2002"))
    tmp15$TT <- as.factor(as.numeric(gsub("\\D","",tmp15$variable)))
    tmp15$variable <- gsub("(\\w)\\.vs.*$","\\1",tmp15$variable,perl=T)
    tmp15$variable <- as.factor(gsub("(\\w)\\..*$","\\1",tmp15$variable,perl=T))
    cast.formula <- formula(paste(
    					    paste("mun","state","pmdb2000","pmdb2004",
    					    "pmdb2008","pmdb1996","TT",sep="+"),
    					    if(weights==T){"+weights~variable"}else{"~variable"},sep=""))
    did15 <- cast(tmp15,cast.formula)  
    did15[,c("pmdb")]<-did15[,c("pmdb")]*100
    did <- did15
    }
    if(pty=="all"|pty==45){
    tmp45 <- melt(x, 
    		id.vars=c("mun","state","psdb2000","psdb2004","psdb2008","psdb1996",more.vars), 
    		measure.vars=c("serra.vs.2010","alkmin.vs.2006","serra.vs.2002",
    						"fhc.vs.1998","fhc.vs.1994","psdb.vs.1994",
    						"psdb.vs.1998","psdb.vs.2010","psdb.vs.2006","psdb.vs.2002"))
    tmp45$TT <- as.factor(as.numeric(gsub("\\D","",tmp45$variable)))
    tmp45$variable <- gsub("(\\w)\\.vs.*$","\\1",tmp45$variable,perl=T)
    tmp45$variable <- gsub("(alkmin)|(serra)|(fhc)","psdbpres",tmp45$variable,perl=T)
    tmp45$variable <- as.factor(gsub("(\\w)\\..*$","\\1",tmp45$variable,perl=T))
    cast.formula <- formula(paste(
    					    paste("mun","state","psdb2000","psdb2004",
    					    "psdb2008","psdb1996","TT",sep="+"),
    					    if(weights==T){"+weights~variable"}else{"~variable"},sep=""))
    did45 <- cast(tmp45,cast.formula)  
    did45[,c("psdbpres","psdb")]<-did45[,c("psdbpres","psdb")]*100
    did <- did45
    }
    if(pty=="all"){
    did <- merge(
    		merge(did13,did15,by=c("mun","state","TT")),
    		did45,by=c("mun","state","TT"));
    		 cat("Returning DID set for PT, PSDB, and PMDB")}
  
    return(as.data.frame(did))
}##end of did.melt()

stack.did <- function(x){  #Assembles table with results
     if(class(x)=="lm"){x<-summary(x)}
     if(class(x)!="summary.lm"){cat("Object of wrong class! \n")}else{
    core<-stack(data.frame(round(rbind(x$coef[,"Estimate"],
    		x$coef[,"Std. Error"],x$coef[,"Pr(>|t|)"]),3)))
    core$ind<-as.character(core$ind)
 names(core)[1]<-"lm"
 core$type <- rep(1:3,nrow(core)/3)
 core$ind <- gsub("\\.","",core$ind,perl=T)
 core$ind <- gsub("TRUE","",core$ind,perl=T)
 core$ind <- gsub("^I","",core$ind,perl=T)
 core$ind <- gsub("asfactor","",core$ind,perl=T)
 core$ind <- gsub("TT\\d\\d\\d\\d","TIME",core$ind,perl=T)
 core$ind <- gsub("p.*\\d\\d\\d\\d","TREAT",core$ind,perl=T)
 core$ind <- gsub("X","",core$ind,perl=T)
 core<-rbind(core,c(length(x$residuals),"N",NA))
 core<-rbind(core,c(length(x$na.action),"Droped",NA))
 core<-rbind(core,c(round(x$adj.r.squared,3),"R2",NA))
 core[,1]<-as.numeric(core[,1])  #to keep numbers as numbers
 return(core)
    }
}## end of stack.did()

fix.did <- function(x){ ### Formats variable names for table for Latex
	    new.order <- c(grep("Intercept",x$ind),  
                   grep("^TIME$",x$ind),
                   grep("^TREAT$",x$ind),
                   grep("TIMETREAT",x$ind),
    	           grep("^N",x$ind),
                   grep("^Drop",x$ind),
                   grep("^R",x$ind))
    new.x <-x[new.order,]
    new.x$ind <- gsub("TIMETREAT","TIME$\times$TREAT",new.x$ind)
    #new.x$ind <- gsub("T","",new.x$ind)
        new.x$ind <- ifelse(new.x$type==1,new.x$ind,NA)
    return(new.x[,-2])
}## end of fix.did()
    


##### LOAD THE DATASET FOR ANALYSIS ############################
load("__BJPS_replication/data_electoral.RData") #load the datafile



#### An aside: DID without matching #############################
#### Not used in paper, but presented here for convenience	    #
did <- did.melt(d)  
### To analyze these data, we could proceed as follows (for each party/cycle):
##party vote
summary(lm(pt~TT*pt2000,data=
		subset(did,pt1996==F&(TT==1994|TT==2002))))
##party lingering effects 
summary(lm(pt~TT*pt2000,data=
		subset(did,pt1996==F&(TT==1994|TT==2002|TT==2006|TT==2010))))
#president vote
summary(lm(ptpres~TT*pt2000,data=
		subset(did,pt1996==F&(TT==1994|TT==2002))))
#placebo for party does opening after t4 affect change in peformance between t0 and t3?
summary(lm(pt~TT*pt2004,data=
		subset(did,pt1996==F&pt2000==F&(TT==1994|TT==1998))))



#### MATCHING FOR DID                             ##################
#### To match on pre-treatment covariates 					########
#### The idea is to replicate CEM (approximately) by hand 	########
#### using matchit. This means first creating a matched set ########
#### of the places the parties were not present at t0, then ########
#### producing the DID set from the matched subset of data  ########
#### A separate data set has to be created for EACH 		########
#### electoral cycle/party, as matching is period specific	########
####################################################################

#### Cycle 1 PT: opening between 1996/2000, effects on vs 2002-1994 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
pt.vs.cut <- quantile(d$pt.vs.1994,prob=seq(0,1,length=4)[-4],na.rm=T)
lula.vs.cut <- quantile(d$lula.vs.1994,prob=seq(0,1,length=6)[-6],na.rm=T) 
pop.cut <- quantile(d$pop.1994,prob=seq(0,1,length=4)[-4],na.rm=T) 					
d$hdi.cut <- findInterval(d$hdi.1991,hdi.cut)          
d$pt.vs.cut <- findInterval(d$pt.vs.1994,pt.vs.cut) 
d$lula.vs.cut <- findInterval(d$lula.vs.1994,lula.vs.cut) 
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.1994,pop.cut)

pre.vars<- c("lula.vs.1994","pt.vs.1994","hdi.1991","pop.1994",
             "distcap","region","ptgov.1994","ngodensity.1996")
trt.vars <- c("pt2000","pt1996")
cem.vars  <- c("lula.vs.cut","pt.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("pt2000","pt2004","pt2008","dilma.vs.2010","lula.vs.2006","lula.vs.2002",
    						"lula.vs.1998","lula.vs.1994","pt.vs.1994",
    						"pt.vs.1998","pt.vs.2010","pt.vs.2006","pt.vs.2002") 
tmp <- na.omit(d[d$pt1996==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.00 <- matchit(pt2000~pt.vs.1994+lula.vs.1994+hdi.1991+
				log(pop.1994)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
summary(matching.00)$nn
dm1300 <- match.data(matching.00)
	## Checking Balance
	the.formula <- formula(paste("pt2000~pt.vs.1994+lula.vs.1994+hdi.1991+
				log(pop.1994)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm1300,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)


#### Cycle 2 PT: opening between 2000/2004, effects on vs 2006-1998 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
pt.vs.cut <- quantile(d$pt.vs.1998,prob=seq(0,1,length=4)[-4],na.rm=T)
lula.vs.cut <- quantile(d$lula.vs.1998,prob=seq(0,1,length=6)[-6],na.rm=T) 
pop.cut <- quantile(d$pop.1998,prob=seq(0,1,length=4)[-4],na.rm=T) 						
d$hdi.cut <- findInterval(d$hdi.2000,hdi.cut)          
d$pt.vs.cut <- findInterval(d$pt.vs.1998,pt.vs.cut) 
d$lula.vs.cut <- findInterval(d$lula.vs.1998,lula.vs.cut) 
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.1998,pop.cut)

pre.vars<- c("lula.vs.1998","pt.vs.1998","hdi.2000","pop.1998",
             "distcap","region","ptgov.1998","ngodensity.1996")
trt.vars <- c("pt2004","pt2000")
cem.vars  <- c("lula.vs.cut","pt.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("pt1996","pt2000","pt2004","pt2008","dilma.vs.2010","lula.vs.2006",
							"lula.vs.2002",
    						"lula.vs.1998","lula.vs.1994","pt.vs.1994",
    						"pt.vs.1998","pt.vs.2010","pt.vs.2006","pt.vs.2002") 
tmp <- na.omit(d[d$pt2000==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.04 <- matchit(pt2004~pt.vs.1998+lula.vs.1998+hdi.2000+
				log(pop.1998)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
dm1304 <- match.data(matching.04)
summary(matching.04)$nn
	## Checking Balance
	the.formula <- formula(paste("pt2004~pt.vs.1998+lula.vs.1998+hdi.2000+
				log(pop.1998)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm1304,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)

#### Cycle 3 PT: opening between 2004/2008, effects on vs 2010-2002 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
pt.vs.cut <- quantile(d$pt.vs.2002,prob=seq(0,1,length=4)[-4],na.rm=T)
lula.vs.cut <- quantile(d$lula.vs.2002,prob=seq(0,1,length=6)[-6],na.rm=T) 
pop.cut <- quantile(d$pop.2002,prob=seq(0,1,length=4)[-4],na.rm=T) 						
d$hdi.cut <- findInterval(d$hdi.2000,hdi.cut)          
d$pt.vs.cut <- findInterval(d$pt.vs.2002,pt.vs.cut) 
d$lula.vs.cut <- findInterval(d$lula.vs.2002,lula.vs.cut) 
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.2002,pop.cut)

pre.vars<- c("lula.vs.2002","pt.vs.2002","hdi.2000","pop.2002",
             "distcap","region","ptgov.2002","ngodensity.2002")
trt.vars <- c("pt2008","pt2004")
cem.vars  <- c("lula.vs.cut","pt.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("pt1996","pt2000","pt2004","pt2008","dilma.vs.2010","lula.vs.2006",
							"lula.vs.2002",
    						"lula.vs.1998","lula.vs.1994","pt.vs.1994",
    						"pt.vs.1998","pt.vs.2010","pt.vs.2006","pt.vs.2002") 
tmp <- na.omit(d[d$pt2004==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.08 <- matchit(pt2008~pt.vs.2002+lula.vs.2002+hdi.2000+
				log(pop.2002)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
dm1308 <- match.data(matching.08)
summary(matching.08)$nn
	## Checking Balance
	the.formula <- formula(paste("pt2008~pt.vs.2002+lula.vs.2002+hdi.2000+
				log(pop.2002)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm1308,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)

#### Cycle 1 PSDB: opening between 1996/2000, effects on vs 2002-1994 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
psdb.vs.cut <- quantile(d$psdb.vs.1994,prob=seq(0,1,length=4)[-4],na.rm=T)
fhc.vs.cut <- quantile(d$fhc.vs.1994,prob=seq(0,1,length=6)[-6],na.rm=T) 
pop.cut <- quantile(d$pop.1994,prob=seq(0,1,length=4)[-4],na.rm=T) 						
d$hdi.cut <- findInterval(d$hdi.1991,hdi.cut)          
d$psdb.vs.cut <- findInterval(d$psdb.vs.1994,psdb.vs.cut) 
d$fhc.vs.cut <- findInterval(d$fhc.vs.1994,fhc.vs.cut) 
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.1994,pop.cut)

pre.vars<- c("fhc.vs.1994","psdb.vs.1994","hdi.1991","pop.1994",
             "distcap","region","psdbgov.1994","ngodensity.1996")
trt.vars <- c("psdb2000","psdb1996")
cem.vars  <- c("fhc.vs.cut","psdb.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("psdb2000","psdb2004","psdb2008","serra.vs.2010","alkmin.vs.2006","serra.vs.2002",
    						"fhc.vs.1998","fhc.vs.1994","psdb.vs.1994",
    						"psdb.vs.1998","psdb.vs.2010","psdb.vs.2006","psdb.vs.2002") 
tmp <- na.omit(d[d$psdb1996==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.00 <- matchit(psdb2000~psdb.vs.1994+fhc.vs.1994+hdi.1991+
				log(pop.1994)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
print(summary(matching.00)$nn)
dm4500 <- match.data(matching.00)
	## Checking Balance
	the.formula <- formula(paste("psdb2000~psdb.vs.1994+fhc.vs.1994+hdi.1991+
				log(pop.1994)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm4500,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)


#### Cycle 2 PSDB: opening between 2000/2004, effects on vs 2006-1998 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
psdb.vs.cut <- quantile(d$psdb.vs.1998,prob=seq(0,1,length=4)[-4],na.rm=T)
fhc.vs.cut <- quantile(d$fhc.vs.1998,prob=seq(0,1,length=6)[-6],na.rm=T) 
pop.cut <- quantile(d$pop.1998,prob=seq(0,1,length=4)[-4],na.rm=T) 						
d$hdi.cut <- findInterval(d$hdi.2000,hdi.cut)          
d$psdb.vs.cut <- findInterval(d$psdb.vs.1998,psdb.vs.cut) 
d$fhc.vs.cut <- findInterval(d$fhc.vs.1998,fhc.vs.cut) 
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.1998,pop.cut)

pre.vars<- c("fhc.vs.1998","psdb.vs.1998","hdi.2000","pop.1998",
             "distcap","region","psdbgov.1998","ngodensity.1996")
trt.vars <- c("psdb2004","psdb2000")
cem.vars  <- c("fhc.vs.cut","psdb.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("psdb1996","psdb2000","psdb2004","psdb2008","serra.vs.2010","alkmin.vs.2006",
							"serra.vs.2002",
    						"fhc.vs.1998","fhc.vs.1994","psdb.vs.1994",
    						"psdb.vs.1998","psdb.vs.2010","psdb.vs.2006","psdb.vs.2002") 
tmp <- na.omit(d[d$psdb2000==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.04 <- matchit(psdb2004~psdb.vs.1998+fhc.vs.1998+hdi.2000+
				log(pop.1998)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
dm4504 <- match.data(matching.04)
summary(matching.04)$nn
	## Checking Balance
	the.formula <- formula(paste("psdb2004~psdb.vs.1998+fhc.vs.1998+hdi.2000+
				log(pop.1998)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm4504,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)

#### Cycle 3 PSDB: opening between 2004/2008, effects on vs 2010-2002 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
psdb.vs.cut <- quantile(d$psdb.vs.2002,prob=seq(0,1,length=4)[-4],na.rm=T)
serra.vs.cut <- quantile(d$serra.vs.2002,prob=seq(0,1,length=6)[-6],na.rm=T) 
pop.cut <- quantile(d$pop.2002,prob=seq(0,1,length=4)[-4],na.rm=T) 						
d$hdi.cut <- findInterval(d$hdi.2000,hdi.cut)          
d$psdb.vs.cut <- findInterval(d$psdb.vs.2002,psdb.vs.cut) 
d$serra.vs.cut <- findInterval(d$serra.vs.2002,serra.vs.cut) 
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.2002,pop.cut)

pre.vars<- c("serra.vs.2002","psdb.vs.2002","hdi.2000","pop.2002",
             "distcap","region","psdbgov.2002","ngodensity.2002")
trt.vars <- c("psdb2008","psdb2004")
cem.vars  <- c("serra.vs.cut","psdb.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("psdb1996","psdb2000","psdb2004","psdb2008","serra.vs.2010","alkmin.vs.2006",
							"serra.vs.2002",
    						"fhc.vs.1998","fhc.vs.1994","psdb.vs.1994",
    						"psdb.vs.1998","psdb.vs.2010","psdb.vs.2006","psdb.vs.2002") 
tmp <- na.omit(d[d$psdb2004==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.08 <- matchit(psdb2008~psdb.vs.2002+serra.vs.2002+hdi.2000+
				log(pop.2002)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
dm4508 <- match.data(matching.08)
summary(matching.08)$nn
	## Checking Balance
	the.formula <- formula(paste("psdb2008~psdb.vs.2002+serra.vs.2002+hdi.2000+
				log(pop.2002)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm4508,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)


#### Cycle 1 PMDB: opening between 1996/2000, effects on vs 2002-1994 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
pmdb.vs.cut <- quantile(d$pmdb.vs.1994,prob=seq(0,1,length=4)[-4],na.rm=T)
pop.cut <- quantile(d$pop.1994,prob=seq(0,1,length=4)[-4],na.rm=T) 						
d$hdi.cut <- findInterval(d$hdi.1991,hdi.cut)          
d$pmdb.vs.cut <- findInterval(d$pmdb.vs.1994,pmdb.vs.cut) 
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.1994,pop.cut)

pre.vars<- c("pmdb.vs.1994","hdi.1991","pop.1994",
             "distcap","region","pmdbgov.1994","ngodensity.1996")
trt.vars <- c("pmdb2000","pmdb1996")
cem.vars  <- c("pmdb.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("pmdb2000","pmdb2004","pmdb2008","pmdb1996","pmdb.vs.1994",
    						"pmdb.vs.1998","pmdb.vs.2010","pmdb.vs.2006","pmdb.vs.2002") 
tmp <- na.omit(d[d$pmdb1996==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.00 <- matchit(pmdb2000~pmdb.vs.1994+hdi.1991+
				log(pop.1994)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
print(summary(matching.00)$nn)
dm1500 <- match.data(matching.00)
	## Checking Balance
	the.formula <- formula(paste("pmdb2000~pmdb.vs.1994+hdi.1991+
				log(pop.1994)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm1500,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)


#### Cycle 2 pmdb: opening between 2000/2004, effects on vs 2006-1998 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
pmdb.vs.cut <- quantile(d$pmdb.vs.1998,prob=seq(0,1,length=4)[-4],na.rm=T)
pop.cut <- quantile(d$pop.1998,prob=seq(0,1,length=4)[-4],na.rm=T) 						
d$hdi.cut <- findInterval(d$hdi.2000,hdi.cut)          
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.1998,pop.cut)

pre.vars<- c("pmdb.vs.1998","hdi.2000","pop.1998",
             "distcap","region","pmdbgov.1998","ngodensity.1996")
trt.vars <- c("pmdb2004","pmdb2000")
cem.vars  <- c("pmdb.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("pmdb2000","pmdb2004","pmdb2008","pmdb1996","pmdb.vs.1994",
    						"pmdb.vs.1998","pmdb.vs.2010","pmdb.vs.2006","pmdb.vs.2002") 
tmp <- na.omit(d[d$pmdb2000==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.04 <- matchit(pmdb2004~pmdb.vs.1998+hdi.2000+
				log(pop.1998)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
dm1504 <- match.data(matching.04)
summary(matching.04)$nn
	## Checking Balance
	the.formula <- formula(paste("pmdb2004~pmdb.vs.1998+hdi.2000+
				log(pop.1998)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm1504,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)

#### Cycle 3 pmdb: opening between 2004/2008, effects on vs 2010-2002 ###
hdi.cut<- c(0,0.656,0.752,1) ## manually Coarsen some variables 
distcap.cut <- as.numeric(quantile(d$distcap,prob=c(0,1/3,2/3)))
pmdb.vs.cut <- quantile(d$pmdb.vs.2002,prob=seq(0,1,length=4)[-4],na.rm=T)
pop.cut <- quantile(d$pop.2002,prob=seq(0,1,length=4)[-4],na.rm=T) 						
d$hdi.cut <- findInterval(d$hdi.2000,hdi.cut)          
d$pmdb.vs.cut <- findInterval(d$pmdb.vs.2002,pmdb.vs.cut) 
d$distcap.cut <- findInterval(d$distcap,distcap.cut)
d$pop.cut <- findInterval(d$pop.2002,pop.cut)

pre.vars<- c("pmdb.vs.2002","hdi.2000","pop.2002",
             "distcap","region","pmdbgov.2002","ngodensity.2002")
trt.vars <- c("pmdb2008","pmdb2004")
cem.vars  <- c("pmdb.vs.cut","hdi.cut","distcap.cut","pop.cut")
out.vars <- c("pmdb2000","pmdb2004","pmdb2008","pmdb1996","pmdb.vs.1994",
    						"pmdb.vs.1998","pmdb.vs.2010","pmdb.vs.2006","pmdb.vs.2002") 
tmp <- na.omit(d[d$pmdb2004==F,
               c("mun","state",
               union(union(trt.vars,pre.vars),out.vars),
               cem.vars)])
matching.08 <- matchit(pmdb2008~pmdb.vs.2002+hdi.2000+
				log(pop.2002)+distcap,data=tmp,
				exact=c("region",cem.vars),replace=T)
dm1508 <- match.data(matching.08)
summary(matching.08)$nn
	## Checking Balance
	the.formula <- formula(paste("pmdb2008~pmdb.vs.2002+hdi.2000+
				log(pop.2002)+distcap+",paste(cem.vars,collapse="+")))
	thebal<-xBalance(the.formula,data=dm1508,report="all")
	plot(thebal)
	cat("Bowler and Hanson omnibus test\n");print(thebal$overall)


######## ANALYSIS OF EACH MATCHED SET ##############
### Analysis for the PT ############################
### Convert to DD panel setup, and analyze 
### Note that because of matching, each set is specific to a treatment/year/party
### Regressions for party, party lingering, party placebo, and president effects
didm1300 <- did.melt(dm1300,weights=T,pty=13) 
didm1304 <- did.melt(dm1304,weights=T,pty=13) 
didm1308 <- did.melt(dm1308,weights=T,pty=13) 

##party
reg13.00 <- summary(lm(pt~TT*pt2000,data=subset(didm1300,pt1996==F&(TT==1994|TT==2002)),
			weights=weights))
reg13.04 <- summary(lm(pt~TT*pt2004,data=subset(didm1304,pt2000==F&(TT==1998|TT==2006)),
			weights=weights))
reg13.08 <- summary(lm(pt~TT*pt2008,data=subset(didm1308,pt2004==F&(TT==2002|TT==2010)),
			weights=weights))

##party lingering effects 
summary(lm(pt~TT*pt2000,data=subset(didm1300,pt1996==F&(TT==1994|TT==2002|TT==2006|TT==2010)),
			weights=weights))
summary(lm(pt~TT*pt2004,data=subset(didm1304,pt2000==F&(TT==1998|TT==2006|TT==2010)),
			weights=weights))

#president
summary(lm(ptpres~TT*pt2000,data=subset(didm1300,pt1996==F&(TT==1994|TT==2002)),
			weights=weights))
summary(lm(ptpres~TT*pt2004,data=subset(didm1304,pt2000==F&(TT==1998|TT==2006)),
			weights=weights))
summary(lm(ptpres~TT*pt2008,data=subset(didm1308,pt2004==F&(TT==2002|TT==2010)),
			weights=weights))

#placebo for party
reg13.00p <- summary(lm(pt~TT*pt2004,
			data=subset(didm1300,pt1996==F&pt2000==F&(TT==1994|TT==1998)),
			weights=weights))
reg13.04p <- summary(lm(pt~TT*pt2008,
			data=subset(didm1304,pt2000==F&pt2004==F&(TT==1998|TT==2002)),
			weights=weights))

####### Analysis for the PSDB #######################
### Convert to DD panel setup, and analyze 
### Not that because of matching, each set is specific to a treatment/year/party
### Regressions for party, party lingering, party placebo, and president effects
didm4500 <- did.melt(dm4500,weights=T,pty=45) 
didm4504 <- did.melt(dm4504,weights=T,pty=45) 
didm4508 <- did.melt(dm4508,weights=T,pty=45) 

##party
reg45.00 <- summary(lm(psdb~TT*psdb2000,data=subset(didm4500,psdb1996==F&(TT==1994|TT==2002)),
			weights=weights))
reg45.04 <-summary(lm(psdb~TT*psdb2004,data=subset(didm4504,psdb2000==F&(TT==1998|TT==2006)),
			weights=weights))
reg45.08 <-summary(lm(psdb~TT*psdb2008,data=subset(didm4508,psdb2004==F&(TT==2002|TT==2010)),
			weights=weights))

##party lingering effects 
summary(lm(psdb~TT*psdb2000,data=subset(didm4500,psdb1996==F&(TT==1994|TT==2002|TT==2006|TT==2010)),
			weights=weights))
summary(lm(psdb~TT*psdb2004,data=subset(didm4504,psdb2000==F&(TT==1998|TT==2006|TT==2010)),
			weights=weights))

#president
summary(lm(psdbpres~TT*psdb2000,data=subset(didm4500,psdb1996==F&(TT==1994|TT==2002)),
			weights=weights))
summary(lm(psdbpres~TT*psdb2004,data=subset(didm4504,psdb2000==F&(TT==1998|TT==2006)),
			weights=weights))
summary(lm(psdbpres~TT*psdb2008,data=subset(didm4508,psdb2004==F&(TT==2002|TT==2010)),
			weights=weights))

#placebo for party
reg45.00p<- summary(lm(psdb~TT*psdb2004,
			data=subset(didm4500,psdb1996==F&psdb2000==F&(TT==1994|TT==1998)),
			weights=weights))
reg45.04p<- summary(lm(psdb~TT*psdb2008,
			data=subset(didm4504,psdb2000==F&psdb2004==F&(TT==1998|TT==2002)),
			weights=weights))


####### Analysis for the PMDB #######################
### Convert to DD panel setup, and analyze 
### Not that because of matching, each set is specific to a treatment/year/party
### Regressions for party, party lingering, party placebo, and president effects
didm1500 <- did.melt(dm1500,weights=T,pty=15) 
didm1504 <- did.melt(dm1504,weights=T,pty=15) 
didm1508 <- did.melt(dm1508,weights=T,pty=15) 

##party
reg15.00 <-summary(lm(pmdb~TT*pmdb2000,data=subset(didm1500,pmdb1996==F&(TT==1994|TT==2002)),
			weights=weights))
reg15.04 <-summary(lm(pmdb~TT*pmdb2004,data=subset(didm1504,pmdb2000==F&(TT==1998|TT==2006)),
			weights=weights))
reg15.08 <-summary(lm(pmdb~TT*pmdb2008,data=subset(didm1508,pmdb2004==F&(TT==2002|TT==2010)),
			weights=weights))

##party lingering effects 
summary(lm(pmdb~TT*pmdb2000,data=subset(didm1500,pmdb1996==F&(TT==1994|TT==2002|TT==2006|TT==2010)),
			weights=weights))
summary(lm(pmdb~TT*pmdb2004,data=subset(didm1504,pmdb2000==F&(TT==1998|TT==2006|TT==2010)),
			weights=weights))

#no president for PMDB

#placebo for party
reg15.00p <-summary(lm(pmdb~TT*pmdb2004,
			data=subset(didm1500,pmdb1996==F&pmdb2000==F&(TT==1994|TT==1998)),
			weights=weights))
reg15.04p <-summary(lm(pmdb~TT*pmdb2008,
			data=subset(didm1504,pmdb2000==F&pmdb2004==F&(TT==1998|TT==2002)),
			weights=weights))

###################################################
### Create table 2 presented in the paper 		###
### Table with matching results for legislative ###
regs13 <- merge(
merge(stack.did(reg13.00),stack.did(reg13.04),by=c("ind","type"),all=T,suffixes=c("2000","2004"))
,stack.did(reg13.08),all=T)
names(regs13)[5]<-"lm2008"
#print(xtable(fix.did(regs13)),include.rownames=F)

regs15 <- merge(
merge(stack.did(reg15.00),stack.did(reg15.04),by=c("ind","type"),all=T,suffixes=c("2000","2004"))
,stack.did(reg15.08),all=T)
names(regs15)[5]<-"lm2008"
#print(xtable(fix.did(regs15)),include.rownames=F)

regs45 <- merge(
merge(stack.did(reg45.00),stack.did(reg45.04),by=c("ind","type"),all=T,suffixes=c("2000","2004"))
,stack.did(reg45.08),all=T)
names(regs45)[5]<-"lm2008"
#print(xtable(fix.did(regs45)),include.rownames=F)

#Collapse all party tables into what is Table 2, in the paper
#Dates are when the party "opened" a branch
#effects are measured at the national election held two years after the opening date. 
regs <- merge(merge(regs13,regs15,by=c("ind","type"),suffixes=c(".PT",".PMDB"),all=T),
regs45,by=c("ind","type"),all=T)
print(xtable(fix.did(regs)),include.rownames=F)

#Table with placebo effect (mannualy merged with effects, above, in table 2 in paper)
regsp <- merge(merge(
	data.frame(merge(stack.did(reg13.00p),stack.did(reg13.04p),
			by=c("ind","type"),all=T,suffixes=c("2000","2004")),lm2008=NA),
	data.frame(merge(stack.did(reg15.00p),stack.did(reg15.04p),
			by=c("ind","type"),all=T,suffixes=c("2000","2004")),lm2008=NA)
	,by=c("ind","type"),suffixes=c(".PT",".PMDB"),all=T),
	data.frame(merge(stack.did(reg45.00p),stack.did(reg45.04p),
			by=c("ind","type"),all=T,suffixes=c("2000","2004")),lm2008=NA)
	,by=c("ind","type"),all=T)
print(xtable(fix.did(regsp)[10:12,]),include.rownames=F)

